Workflow

The plague-phylogeography snakemake pipeline.

Click the nodes to obtain details about each step.

Clock

RTT

Rate

Skyline

Time Tree

Geo

Spreadmap

Mugration

Branch Major

Statistics

If the workflow has been executed in cluster/cloud, runtimes include the waiting time in the queue.

Configuration

Configuration files
File Code
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# Snakemake Configuration File

# SQLITE Parameters
sqlite_db : "yersinia_pestis_db.sqlite"
sqlite_select_command_asm : SELECT
                              AssemblyFTPGenbank
                            FROM
                              BioSample
                            LEFT JOIN
                              Assembly ON AssemblyBioSampleAccession = BioSampleAccession
                            WHERE
                              (BioSampleComment LIKE '%KEEP%Assembly%Modern%' AND
                               length(AssemblyFTPGenbank) > 0)
sqlite_select_command_sra : SELECT
                              BioSampleAccession,
                              SRARunAccession
                            FROM
                              BioSample
                            LEFT JOIN SRA
                              ON SRABioSampleAccession = BioSampleAccession
                            WHERE
                              (SRARunAccession = 'SRR1048902' OR
                               SRARunAccession = 'SRR1048905')
sqlite_select_command_local : SELECT
                              BioSampleAccession
                            FROM
                              BioSample
                            WHERE
                              (BioSampleComment LIKE '%Local%Test%')
sqlite_select_command_ref : SELECT
                              AssemblyFTPGenbank
                            FROM
                              BioSample
                            LEFT JOIN Assembly
                              ON AssemblyBioSampleAccession = BioSampleAccession
                            WHERE
                              (BioSampleComment LIKE '%Assembly%Modern%Reference%')

# Dataset size
max_datasets_assembly : 3
max_datasets_sra : 2
reads_origin :
  - "assembly"
  - "sra"
  - "local"

# misc filtering
detect_repeats_threshold : 90
detect_repeats_length : 50

reference_locus : "AL590842"
reference_locus_name : "chromosome"
reference_locus_start : "0"
reference_locus_end : "4653728"

# nf-core/eager parameters
eager_rev: "2.2.1"
eager_clip_readlength : 35
eager_bwaalnn : 0.01
eager_bwaalnl : 16
eager_other : '--mergedonly'
organism : "Yersinia pestis"
# Adapter Removal Defaults
eager_forward_adapter : 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG'
eager_reverse_adapter : 'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT'

# snippy parameters
snippy_ctg_depth : 10
snippy_bam_depth : 3
snippy_base_qual : 20
snippy_map_qual : 30
snippy_min_frac : 0.9
snippy_mask_char : "X"
snippy_missing_data : 50
snippy_snp_density : 10

# Make this an empty string if removing singletons
# snippy_keep_singleton: ""
snippy_keep_singleton : "--keep-singleton"
snippy_multi_plot_missing_data:
  - 0
  - 25
  - 50
  - 75
  - 100

# IQTREE
iqtree_model: "-m MFP"
#iqtree_model : "-m K3Pu+F+I"
iqtree_seed : "47321424" # keeping it consistent in a config file allows for checkpointing
# Outgroup Option #1: Reference
iqtree_outgroup : "Reference"
# Outgroup Option #2: Basal modern clade
#iqtree_outgroup : "GCA_000323485.1_ASM32348v1_genomic,GCA_000323845.1_ASM32384v1_genomic"
# Outgroup Option #3: Basal ancient clade
#iqtree_outgroup : "SAMEA3541826,SAMEA3541827"
iqtree_other : "--ufboot 1000"
iqtree_runs : 1

Loading...